********************************************************************************
* This program generates the main regression dataset
*
* Authors: Fowlie and Reguant
* August 2020
********************************************************************************

clear all 
set more off, perm


********************************************************************************
* 				PART A. Merge All data set									   *
********************************************************************************


********************************************************************************
* Price (and the balanced panel)
********************************************************************************
* 1. price
use "$energy_price/energy_price_industry_year.dta", clear
drop sector naics_mecs

* merge in domestic indirect price
preserve
	use "$energy_price/energy_price_industry_year_indirect.dta", clear
	rename naics naics2007
	keep naics2007 naics2012 year ind_price 
	tempfile indirectprice
	save `indirectprice', replace
restore
merge 1:1 naics2007 naics2012 year using `indirectprice'
drop if _merge == 1
drop _merge 
rename ind_price price_TIV_ind

* merge in foreign indirect price
preserve
	use "$energy_price/energy_foreignprice_industry_year_indirect.dta", clear
	rename naics naics2007
	keep naics2007 naics2012 year ind_price_imp ind_price_exp 
	tempfile indirectprice
	save `indirectprice', replace
restore
merge 1:1 naics2007 naics2012 year using `indirectprice'
drop _merge 
rename ind_price_imp price_imp_ind
rename ind_price_exp price_exp_ind

********************************************************************************
* Dependent variable
********************************************************************************

* I. shipment *************************************
{
* 1. NBER shipment (1990-2011) and other economic variables
preserve
	use "$buildpath/input/shipments/NBER_shipments_naics1997_1958-2011.dta", clear
	keep naics year vship emp pay prode prodh prodw matcost vadd invest invent
	rename vship shipment
	rename * *_nber
	label var shipment_nber "Total value of shipments in $1m, nominal"
	rename naics_nber naics
	rename year_nber year
	
	** Uncomment if want to stick to 98 original industries
	*replace naics = 31131 if floor(naics/10) ==  31131
	*replace naics = 31181 if floor(naics/10) ==  31181
	*collapse (sum)  shipment_nber, by(naics year)

	keep if year >= 1989
	tempfile nber
	save `nber.dta', replace
restore

merge m:1 naics year using `nber.dta'
drop if _merge == 2
drop _merge


* 3. ASM shipment (2002-2017)
preserve
	use "$ASM/ASM.dta", clear
	keep if level == 6 | naicsid_str == "31131" | naicsid_str == "31181"
	
	keep naicsid naicsid_str naicsid_5digit year ///
		 shipment emp pay prode prodh prodw matcost vadd invest invent
	
	rename * *_asm
	rename naics*_asm naics*
	rename year_asm year
	
	replace shipment_asm = shipment_asm/1000
	label var shipment_asm "Total value of shipments in $1m"
	replace emp_asm = emp_asm/1000
	label var emp_asm "Number of employees in 1000s"
	replace pay_asm = pay_asm/1000
	label var pay_asm "Annual payroll in $1m"
	replace prode_asm = prode_asm/1000
	label var prode_asm "Production workers avg per year in 1000s"
	replace prodh_asm = prodh_asm/1000
	label var prodh_asm "Production workers hours in 1m"
	replace prodw_asm = prodw_asm/1000
	label var prodw_asm "Production workers wages in $1m"
	replace matcost_asm = matcost_asm/1000
	label var matcost_asm "Total cost of materials $1m"
	replace vadd_asm   = vadd_asm/1000
	label var vadd_asm "Value added $1m"
	replace invest_asm = invest_asm/1000
	label var invest_asm "Total capital expenditures (new and used) $1m"
	replace invent_asm = invent_asm/1000
	label var invent_asm "Total inventories, end of year $1m"
	
	drop naicsid 
	rename naicsid_str naics_asm
	tempfile asm
	save `asm.dta', replace
restore

* construct asm merge key
do "$buildpath/code/subfunctions/genASMmergekey.do"

* merge
merge m:1 naics_asm year using `asm.dta'
drop if _merge == 2
drop _merge

* 4. clean variables and set the measurement unit consistent
preserve
	keep naics_asm naics
	duplicates drop
	bysort naics_asm: gen N = _N
	
	tempfile numberofsubindustries
	save `numberofsubindustries.dta', replace
restore

merge m:1 naics_asm naics using `numberofsubindustries.dta'
label var N  "number of NAICS6 industries within this NAICS_asm "
drop _merge 

}
* II. trade data *************************************
{
replace naics = naics2007 if year <= 2012 & naics != 339111
tostring naics, gen(naics_str)

replace naics_str = "31131X" if floor(naics/10) == 31131
replace naics_str = "31181X" if floor(naics/10) == 31181
replace naics_str = "31511X" if floor(naics/10) == 31511 & year <= 2012
replace naics_str = "33631X" if floor(naics/10) == 31511 & year <= 2012

label var naics_str "89-12: 2007 code, 13-17: 2012 code"
merge m:1 naics_str year using "$Schott/trade_naics-89-17.dta"
	replace imports = imports/1000000
	replace exports = exports/1000000
label var imports "Value of imports in $1m, nominal"
label var exports "Value of exports in $1m, nominal"
drop if _merge == 2
drop census_naics 
drop _merge
replace naics = naics2012 if year >= 2011 & naics != 339111
}

********************************************************************************
* Independent variables
********************************************************************************

* 1. energy intensity 
merge m:1 naics2012 using "$EI/energy_intensity_2007.dta", keep(1 3)
drop _merge
drop intensity_old
order naics naics2007 industryname2007 naics2012 industryname2012 naics_str naics_asm N naicsid_5digit naics_3digit year shipment_nber shipment_asm imports exports

* indirect energy intensities
preserve
	use "$EI/naics_intensity_comp_2007.dta", clear
	rename naics naics2007
	keep naics2007 naics2012 total_intensity
	tempfile indirectintensities
	save `indirectintensities', replace
restore

merge m:1 naics2007 naics2012 using `indirectintensities'
drop _merge


merge m:1 naics2012 year using "$EI/energy_intensity_industry_year.dta", keepusing(intensity_ppidef) nogen
rename intensity_ppidef ei_tv

* 2. domestic emission intensities
preserve
	use "$EI/emissionintensities_industry_year.dta", clear
	tempfile carbonintensities
	save `carbonintensities.dta', replace
restore
merge m:1 naics2007 naics2012 naics year using `carbonintensities.dta'
drop _merge
drop census_naics sector naics_mecs fuelshare_ISnaics6level CarbonI_TV CarbonI_TINV annual_avg_emplvl annual_avg_emplvl_us
replace naics = naics2007 if year == 2011 & naics != 339111

* 3. foreign energy price
keep if year >= 1995 & year <= 2015
merge m:1 naics_str year using "$energy_price/importexport_energy_price_industry_year_region-level.dta"
drop if _merge == 2
drop _merge
drop exports_nonmissing_rate imports_nonmissing_rate naics_5digit
* the exports_total and imports_total here do not exactly == the imports and exports value we already merged in as dependent var
* b.c. 1. here the values are at naics_str level
*      2. here missings are all = 0, becuase the main purpose of these 2 variables is weights for the foreign prices.

* 4. world energy prices
preserve
	import excel using "$buildpath/input/energypriceWB/CMOHistoricalDataAnnual.xlsx", sheet("Annual Prices (Real)") cellrange(A7:CD68) firstrow clear 
	rename A year
	keep year Crudeoilaverage
	drop if year == .
	destring Crudeoilaverage, replace
	rename Crudeoilaverage oilprice_world
	label var oilprice_world "World average crude oil price, 2010USD/bbl"
	tempfile oilprice
	save `oilprice.dta', replace
restore

merge m:1 year using `oilprice.dta'
drop _merge

* 5. wages
replace naics = naics2012 if year >= 2011
merge m:1 naics year using "$QCEW/QCEW_ind_year.dta"
drop if _merge == 2
drop _merge annual_avg_emplvl
drop annual_avg_estabs
replace naics = naics2007 if year == 2011 & naics != 339111
drop if naics == 339111 | naics == . | naics == 31131 | naics == 31181

* 6. world exchange rates
preserve
	import delimited using "$buildpath/input/exchangerate/TWEXB.csv", clear
	gen year = real(substr(date,1,4))
	collapse (mean) twexb, by(year)
	label var twexb "Trade Weighted U.S. Dollar Index: Broad, Goods"
	tempfile exchangerate
	save `exchangerate.dta', replace
restore
merge m:1 year using `exchangerate.dta', keep(3)
drop _merge

********************************************************************************
* 				PART B. create variables/deflate/cleaning					   *
********************************************************************************

********************************************************************************
* Use Deflators
********************************************************************************
merge m:1 year using "$buildpath/output/deflator/deflator_year.dta", keep(1 3)
drop _merge

* naics 2007 & naics2012, collapsing later for a homogeneous shipment measure
rename naics naics_1
gen naics = naics2007
replace naics = naics2012 if year >= 2010
merge m:1 naics year using "$buildpath/output/deflator/deflator_industry_year.dta"
drop if _merge == 2
drop _merge
drop naics
rename naics_1 naics

* generate variables - dependent
gen shipments = shipment_nber
replace shipments = shipment_asm if year >= 2012 & N == 1 & shipment_asm != .

gen shipments_2 = shipments / ppidef_ipol
gen imports_2   = imports / ppidef_ipol
gen exports_2   = exports / ppidef_ipol
label var shipments_2 "Total value of shipments in $1m, ppidef_ipol"
label var imports_2 "Total value of imports in $1m, ppidef_ipol"
label var exports_2 "Total value of exports in $1m, ppidef_ipol"

replace shipments = shipments / gdpdef
replace imports   = imports / gdpdef
replace exports   = exports / gdpdef
label var shipments "Total value of shipments in $1m, gdpdef"
label var imports "Total value of imports in $1m, gdpdef"
label var exports "Total value of exports in $1m, gdpdef"

order shipments shipments_2 imports_2 exports_2, b(shipment_nber)

* generate variables - independent
replace price_TIV      = price_TIV / gdpdef
replace price_TIV_ind  = price_TIV_ind / gdpdef
replace price_TV       = price_TV / gdpdef
replace price_elec     = price_elec / gdpdef
replace price_fuel_TIV = price_fuel_TIV / gdpdef
replace price_fuel_TV  = price_fuel_TV / gdpdef
replace oilprice_world = oilprice_world / 1.0391885
replace price_imports  = price_imports / 1.0391885
replace price_exports  = price_exports / 1.0391885

label var price_imports "2007USD per million Btu"
label var price_exports "2007USD per million Btu"
label var oilprice_world "2007USD per bbl"
replace oilprice_world = oilprice_world / 5.8
label var oilprice_world "2007USD per million Btu"
* 1.0391885: 2010 USD to 2007 USD


* generate variables - controls
replace total_annual_wages = total_annual_wages / gdpdef

foreach var in emp pay prode prodh prodw matcost vadd invest invent {
	gen `var' = `var'_nber
	local l : variable label `var'_nber
	label var `var' "`l'"
	replace `var' = `var'_asm if year >= 2012 & N == 1 & shipment_asm != .
}
*
foreach var in pay prodw matcost vadd invest invent {
	replace `var' = gdpdef * `var'
}
*
********************************************************************************
* Collapse to NAICS2012 level
********************************************************************************
{
* (1) 
*the indirect energy intensites and indirect energy prices are both at NAICS2007 level, need to be aggregated to NAICS2012
preserve
	keep if year == 2007
	keep naics2007 naics2012 total_intensity shipments
	replace total_intensity = total_intensity*shipments
	collapse (sum) total_intensity shipments, by(naics2012)
	replace total_intensity = total_intensity/shipments
	label var total_intensity "million BTU/ $1m shipment"
	keep naics2012 total_intensity
	tempfile totalintensities
	save `totalintensities', replace
restore
drop total_intensity
merge m:1 naics2012 using `totalintensities'
drop _merge 
order total_intensity, b(intensity)

* (2)	
*NAICS code year: 2007code; 2012code
* shipments (and other controls from NBER+ASM): -2011; 2012+
* trade + international price: -2012; 2013+
* wage: -2010; 2011+

* 1. shipment data (and other controls from NBER+ASM)
preserve
	keep naics2012 year shipments shipments_2 emp pay prode prodh prodw matcost vadd invest invent
	keep if year <= 2011
	bysort naics2012 year (shipments): 	gen allmissing_shipments = missing(shipments[1])
	bysort naics2012 year (shipments_2): gen allmissing_shipments_2 = missing(shipments_2[1])
	
	foreach var in emp pay prode prodh prodw matcost vadd invest invent {
		bysort naics2012 year (`var'): gen allmissing_`var' = missing(`var'[1])
	}
	*
	
	collapse (sum) shipments shipments_2 emp pay prode prodh prodw matcost vadd invest invent (mean) allmissing*, by(naics2012 year)
	
	replace shipments 	= .   if allmissing_shipments==1
	replace shipments_2 = .   if allmissing_shipments_2==1
	
	foreach var in emp pay prode prodh prodw matcost vadd invest invent {
		replace `var' = . 		if allmissing_`var' == 1
	}
	*
	
	drop allmissing*
	
	rename ship* ship*_pre2011
	foreach var in emp pay prode prodh prodw matcost vadd invest invent {
		rename `var' `var'_pre2011
	}
	*
	
	tempfile pre2011shipments
	save `pre2011shipments.dta', replace
restore

preserve
	keep naics2012 year shipments shipments_2 emp pay prode prodh prodw matcost vadd invest invent
	merge m:1 naics2012 year using `pre2011shipments.dta'
	drop _merge
	replace shipments = shipments_pre2011 		if year <= 2011
	replace shipments_2 = shipments_2_pre2011 	if year <= 2011
	foreach var in emp pay prode prodh prodw matcost vadd invest invent {
		replace `var' = `var'_pre2011			if year <= 2011
	}
	*
	
	drop *pre*
	duplicates drop
	tempfile shipments
	save `shipments.dta', replace
restore

* 2. trade + international price + indirect international prices: -2012; 2013+
preserve
	keep naics2012 year imports exports imports_2 exports_2 price_imports price_exports imports_total exports_total
	keep if year <= 2012
	
	* track missings
	bysort naics2012 year (imports): 	gen allmissing_imports = missing(imports[1])
	bysort naics2012 year (exports): 	gen allmissing_exports = missing(exports[1])
	bysort naics2012 year (imports_2): 	 gen allmissing_imports_2 = missing(imports_2[1])
	bysort naics2012 year (exports_2): 	 gen allmissing_exports_2 = missing(exports_2[1])

	replace price_imports = price_imports * imports_total
	replace price_exports = price_exports * exports_total
	bysort naics2012 year (price_imports): gen allmissing_imp_p = missing(price_imports[1])
	bysort naics2012 year (price_exports): gen allmissing_exp_p = missing(price_exports[1])

	collapse (sum) imports exports imports_2 exports_2 price_imports price_exports imports_total exports_total ///
					(mean) allmissing*, by(naics2012 year)

	replace imports = . 	  if allmissing_imports==1
	replace exports = . 	  if allmissing_exports==1

	replace imports_2 = . 	  if allmissing_imports_2==1
	replace exports_2 = . 	  if allmissing_exports_2==1
	
	replace price_imports = . if allmissing_imp_p==1
	replace price_exports = . if allmissing_exp_p==1

	drop allmissing*

	gen price_foreign = (price_imports + price_exports)/(imports_total+exports_total)
	replace price_imports = price_imports/imports_total
	replace price_exports = price_exports/exports_total

	rename *ports* *ports*_pre2012
	rename price_f* price_f*_pre2012
	tempfile pre2012trade
	save `pre2012trade.dta', replace
restore

preserve
	keep naics2012 year price_imp_ind price_exp_ind imports_total exports_total

	replace price_imp_ind = price_imp_ind * imports_total
	replace price_exp_ind = price_exp_ind * exports_total
	bysort naics2012 year (price_imp_ind): gen allmissing_imp_indp = missing(price_imp_ind[1])
	bysort naics2012 year (price_exp_ind): gen allmissing_exp_indp = missing(price_exp_ind[1])

	collapse (sum) price_imp_ind price_exp_ind imports_total exports_total ///
					(mean) allmissing*, by(naics2012 year)
	
	replace price_imp_ind = . if allmissing_imp_indp==1
	replace price_exp_ind = . if allmissing_exp_indp==1
	drop allmissing*
	
	gen price_foreign_ind = (price_imp_ind + price_exp_ind)/(imports_total+exports_total)
	replace price_imp_ind = price_imp_ind/imports_total
	replace price_exp_ind = price_exp_ind/exports_total
	
	duplicates drop
	tempfile ind_foreignprice
	save `ind_foreignprice.dta', replace
restore

preserve
	keep naics2012 year imports exports imports_2 exports_2 price_imports price_exports imports_total exports_total
	merge m:1 naics2012 year using `pre2012trade.dta'
	drop _merge
	replace imports   = imports_pre2012   if year <= 2012
	replace imports_2 = imports_2_pre2012 if year <= 2012
	replace exports   = exports_pre2012   if year <= 2012
	replace exports_2 = exports_2_pre2012 if year <= 2012
	
	gen price_foreign = (price_imports*imports_total + price_exports*exports_total)/(imports_total+exports_total)
	replace price_imports = price_imports_pre2012 if year <= 2012
	replace price_exports = price_exports_pre2012 if year <= 2012
	replace price_foreign = price_foreign_pre2012 if year <= 2012
	
	replace imports_total = imports_total_pre2012 if year <= 2012
	replace exports_total = exports_total_pre2012 if year <= 2012
	
	drop *pre*
	label var price_foreign "Weighted foreign price, 2007USD per million Btu"
	duplicates drop
	
	merge 1:1 naics2012 year  using `ind_foreignprice.dta'
	drop _merge
	
	tempfile trade
	save `trade.dta', replace
restore

* 3. wage -2010; 2011+
preserve
	keep naics2012 year total_annual_wages
	keep if year <= 2010
	
	bysort naics2012 year (total_annual_wages): 	gen allmissing_wage = missing(total_annual_wages[1])
	
	collapse (sum) total_annual_wages (mean) allmissing_wage, by(naics2012 year)
	
	replace total_annual_wages = . 	  if allmissing_wage==1
	
	drop allmissing*
	rename *wage* *wage*_pre2010
	tempfile pre2010wage
	save `pre2010wage.dta', replace
restore

preserve
	keep naics2012 year total_annual_wages
	merge m:1 naics2012 year using `pre2010wage.dta'
	drop _merge
	replace total_annual_wages   = total_annual_wages_pre2010   if year <= 2010
	
	duplicates drop
	drop *pre2010
	tempfile wage
	save `wage.dta', replace
restore

* 4. other variables (already at NAICS2012 level)
* keep all the variables at NAICS2012 level and (price_TIV_ind, shipments) to generate 2012 level shipment weighted indirect price
keep naics2012 naics year shipments ///
		price_TIV price_TIV_ind price_fuel_TIV price_TV price_fuel_TV price_elec ///
		total_intensity intensity intensity_2 ei_tv CarbonI_TINV_2012NAICS CarbonI_TV_2012NAICS ///
		annual_avg_emplvl_2012NAICS annual_avg_emplvl_us_2012NAICS oilprice_world twexb

rename CarbonI_TV_2012NAICS CarbonI_TV
rename CarbonI_TINV_2012NAICS CarbonI_TINV 
rename annual_avg_emplvl_2012NAICS annual_avg_emplvl
rename annual_avg_emplvl_us_2012NAICS annual_avg_emplvl_us
replace annual_avg_emplvl = annual_avg_emplvl/1000
replace annual_avg_emplvl_us = annual_avg_emplvl_us/1000

label var annual_avg_emplvl "collapsed state annual_avg_emplvl in 1000s, from QCEW"
label var annual_avg_emplvl_us "national level annual_avg_emplvl in 1000s, from QCEW"


preserve
	keep naics2012 naics year price_TIV_ind shipments
	
	replace price_TIV_ind = price_TIV_ind*shipments
	* 17 missings due to missing shipments
	collapse (sum) price_TIV_ind shipments, by(naics2012 year)
	replace price_TIV_ind = price_TIV_ind/shipments
	keep naics2012 year price_TIV_ind
	label var price_TIV_ind "indirect price [time-invariant fuel share] for NAICS2012 industries, $ per mi"
	tempfile totalprice
	save `totalprice', replace
restore 
drop price_TIV_ind shipments naics

duplicates drop
merge 1:1 naics2012 year using `totalprice'
drop _merge

* 5. merge together
merge 1:1 naics2012 year using `shipments.dta'
drop _merge
merge 1:1 naics2012 year using `trade.dta'
drop _merge
merge 1:1 naics2012 year using `wage.dta'
drop _merge

drop imports_total exports_total

order naics2012 year ///
		shipments imports exports shipments_2 imports_2 exports_2 ///
		price_TIV price_TIV_ind price_TV price_elec price_fuel_TIV price_fuel_TV ///
		intensity total_intensity intensity_2 CarbonI_TINV CarbonI_TV ///
		price_imports price_exports price_foreign price_foreign_ind oilprice_world ///
		total_annual_wages annual_avg_emplvl annual_avg_emplvl_us 
}
********************************************************************************
* Weights
********************************************************************************

preserve
	keep if year == 1995
	drop year
	gen wgt_prod_95 = shipments
	gen wgt_imp_95 = imports
	gen wgt_exp_95 = exports
	label var wgt_prod_95 "1995 value of shipments in $1m, nominal"
	label var wgt_imp_95 "1995 value of shipments in $1m, nominal"
	label var wgt_exp_95 "1995 value of shipments in $1m, nominal"
	
	keep naics2012 wgt*
	duplicates drop
	
	tempfile regression weight
	save `regression weight.dta', replace
restore

merge m:1 naics2012 using `regression weight.dta', keep(1 3)
drop _merge


preserve
	keep if year == 2010
	drop year
	gen wgt_prod_10 = shipments
	gen wgt_imp_10 = imports
	gen wgt_exp_10 = exports
	label var wgt_prod_10 "2010 value of shipments in $1m, nominal"
	label var wgt_imp_10 "2010 value of shipments in $1m, nominal"
	label var wgt_exp_10 "2010 value of shipments in $1m, nominal"
	
	keep naics2012 wgt*
	duplicates drop
	
	tempfile regression weight
	save `regression weight.dta', replace
restore

merge m:1 naics2012 using `regression weight.dta', keep(1 3)
drop _merge

********************************************************************************
* Creating variables 
********************************************************************************

gen ln_val_prod = log(shipments)
gen ln_val_imp = log(imports)
gen ln_val_exp = log(exports)

gen ln_qty_prod = log(shipments_2)
gen ln_qty_imp = log(imports_2)
gen ln_qty_exp = log(exports_2)

gen trade_ratio = (imports + exports)/(imports + shipments)
label var trade_ratio "(M+X)/(M+S)"
gen net_trade = (imports - exports)/(shipments)
label var net_trade "(M-X)/S"

* Create trade intensity (with alternative measure)
gen te_temp = trade_ratio if year==2007
egen te_2007 = mean(te_temp), by(naics)
drop te_temp

* Some rescaling
replace intensity = intensity/1000
label var intensity "million Btu/ $000 shipment"
replace total_intensity = total_intensity/1000
label var total_intensity "million Btu/ $000 shipment"


gen ln_p_oil = log(oilprice_world)
gen ln_ei = log(intensity)
label var ln_ei "log(energy intensities in [million Btu/ $1m shipment])"
gen ln_ei_tot = log(total_intensity)
label var ln_ei "log(total energy intensities in [million Btu/ $1m shipment])"
gen ln_ei_2 = log(intensity_2)
label var ln_ei "log(energy intensities in [$ expenditure / $ shipment])"
gen ln_ci = log(CarbonI_TINV)
label var ln_ci "log(energy intensities in [Kg per million Btu])"
gen ln_ci_2 = log(CarbonI_TV)
label var ln_ci_2 "log(energy intensities with time-variant fuel share in [Kg per million Btu])"

rename intensity ei
rename total_intensity ei_tot
rename intensity_2 ei_2
rename CarbonI_TINV ci
rename CarbonI_TV ci_tvshare

* generate variables - independent
gen ln_p_energy = log(price_TIV)
gen ln_p_energy_ei = log(price_TIV)*ei
gen ln_p_energy_ei2= log(price_TIV)*ei*ei
gen ln_p_energy_te = log(price_TIV)*te_2007
gen ln_p_energy_ind = log(price_TIV_ind)
gen ln_p_elec 	= log(price_elec)
gen ln_p_elec_ei= log(price_elec)*ei
gen ln_p_elec_ei2= log(price_elec)*ei*ei
gen ln_p_elec_te= log(price_elec)*te_2007
gen ln_p_fuel 	= log(price_fuel_TIV)

gen ln_p_energy_tvshare = log(price_TV)
gen ln_p_fuel_tvshare = log(price_fuel_TV)
gen ln_fep_imp = log(price_imports)
gen ln_fep_exp = log(price_exports)
gen ln_fep     = log(price_foreign)
gen ln_fep_ind = log(price_foreign_ind)
gen ln_fep_ei = ln_fep*ei 

gen ln_wage = log(total_annual_wages/annual_avg_emplvl_us)

********************************************************************************
* Final cleaning
********************************************************************************
rename naics2012 naics
keep if year >= 1995 & year <= 2015

tsset naics year

* drop if missing and ensure observations for all y variables
drop if year <=1995
keep if ln_val_prod != . & ln_val_imp != . & ln_val_exp != .

* drop if only few years in the sample
sort naics year
by naics: gen obs = _N
drop if obs < 15
drop obs

* Direct energy price
gen ln_p_energy_dir = ln_p_energy
gen ln_fep_dir = ln_fep

* Weights
gen wgt_prod_07 = shipments if year == 2007
gen wgt_imp_07 = imports if year == 2007
gen wgt_exp_07 = exports if year == 2007
sort naics wgt_prod_07
by naics: replace wgt_prod_07 = wgt_prod_07[1]
by naics: replace wgt_imp_07 = wgt_imp_07[1]
by naics: replace wgt_exp_07 = wgt_exp_07[1]

gen wgt_prod_95_ei = wgt_prod_95 * ei
gen wgt_prod_07_ei = wgt_prod_07 * ei
gen wgt_prod_10_ei = wgt_prod_10 * ei

gen wgt_value_95 = wgt_prod_95 + wgt_imp_95
gen wgt_value_07 = wgt_prod_07 + wgt_imp_07
gen wgt_value_10 = wgt_prod_10 + wgt_imp_10

gen wgt_trade_07 = wgt_imp_07+wgt_exp_07
gen wgt_trade_10 = wgt_imp_10+wgt_exp_10

* Generate alternative dependent variables (levels)
sort naics year
replace net_trade = net_trade * shipments / (wgt_imp_07 + wgt_exp_07)

gen rel_shipments = shipments / wgt_prod_07
gen ln_val_con = log(shipments-exports+imports)
gen imp_share = imports/(imports+shipments)
gen dom_share = shipments/(imports+shipments)
replace net_trade = (imports-exports)/(imports+shipments)
gen net_trade2 =  (imports-exports)/(wgt_imp_07+wgt_prod_07)


* Generate categories
gen ei_cat = .
_pctile ei, percentiles(50 75 90 99)
replace ei_cat = 1 if ei <= `r(r2)'
replace ei_cat = 2 if ei > `r(r2)' & ei <= `r(r3)'
replace ei_cat = 3 if ei > `r(r3)' & ei != .

gen ei_tot_cat = .
_pctile ei_tot, percentiles(50 75 90 99)
replace ei_tot_cat = 1 if ei_tot <= `r(r2)'
replace ei_tot_cat = 2 if ei_tot > `r(r2)' & ei_tot <= `r(r3)'
replace ei_tot_cat = 3 if ei_tot > `r(r3)' & ei_tot != .
	
gen te_cat = .
_pctile te_2007, percentiles(10 33 66 99)
replace te_cat = 1 if te_2007 <= `r(r2)'
replace te_cat = 2 if te_2007 > `r(r2)' & te_2007 <= `r(r3)'
replace te_cat = 3 if te_2007 > `r(r3)' & te_2007 != .
	
gen ei_tv_cat = .
_pctile ei_tv, percentiles(50 75 90 99)
replace ei_tv_cat = 1 if ei_tv <= `r(r2)'
replace ei_tv_cat = 2 if ei_tv > `r(r2)' & ei_tv <= `r(r3)'
replace ei_tv_cat = 3 if ei_tv > `r(r3)' & ei_tv != .

save "$buildpath/output/regressions_dataset.dta", replace
